package org.dandelion.papercheck.util;

import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.ooxml.POIXMLDocument;
import org.apache.poi.ooxml.extractor.POIXMLTextExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;

import java.io.*;
import java.util.Objects;

public class OfficeUtil {
    public static String doc2Text(File file) throws IOException {
        try (var is = new FileInputStream(file);
             var extractor = new WordExtractor(is)) {

            return extractor.getText();
        }
    }

    public static String docx2Text(File file) throws Exception {
        OPCPackage opcPackage;
        POIXMLTextExtractor extractor = null;

        try {
            opcPackage = POIXMLDocument.openPackage(file.getPath());
            extractor = new XWPFWordExtractor(opcPackage);
            return extractor.getText();
        } finally {
            Objects.requireNonNull(extractor).close();
        }
    }
}
